# 定义一个Markdown文档
from langchain_community.document_loaders import NotionDirectoryLoader#Notion加载器
from langchain.text_splitter import MarkdownHeaderTextSplitter#markdown分割器
if __name__ == '__main__':
    loader = NotionDirectoryLoader("./docs/Notion_DB")
    docs = loader.load()
    txt = ' '.join([d.page_content for d in docs])  # 拼接文档
    headers_to_split_on = [
        ("#", "Header 1"),
        ("##", "Header 2"),
    ]
    # 加载文档分割器
    markdown_splitter = MarkdownHeaderTextSplitter(
        headers_to_split_on=headers_to_split_on
    )
    md_header_splits = markdown_splitter.split_text(txt)  # 分割文本内容
    print(md_header_splits[1])#分割结果